{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Garimpagem de Dados\n",
    "\n",
    "## Aula 3 - k-Nearest Neighbors\n",
    "\n",
    "11/10/2017"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class KNNClassifier(object):\n",
    "    def __init__(self):\n",
    "        self.X_train = None\n",
    "        self.y_train = None\n",
    "\n",
    "    def euc_distance(self, a, b):\n",
    "        return np.linalg.norm(a-b)\n",
    "\n",
    "    def closest(self, row):\n",
    "        \"\"\"\n",
    "        Retorna a classe respondente ao ponto mais próximo do dataset de treino.\\\n",
    "        É um exemplo de implementação do kNN com k=1.\n",
    "        \"\"\"\n",
    "        dists = [self.euc_distance(row, item) for item in self.X_train]\n",
    "        nei = dists.index(min(dists))\n",
    "        return self.y_train[nei]\n",
    "\n",
    "    def fit(self, training_data, training_labels):\n",
    "        self.X_train = training_data\n",
    "        self.y_train = training_labels\n",
    "\n",
    "    def predict(self, to_classify):\n",
    "        predictions = []\n",
    "        for row in to_classify:\n",
    "            label = self.closest(row)\n",
    "            predictions.append(label)\n",
    "        return predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "iris = datasets.load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['setosa' 'versicolor' 'virginica']\n"
     ]
    }
   ],
   "source": [
    "print(iris.target_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = iris.data\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 5.1  3.5  1.4  0.2]\n",
      " [ 4.9  3.   1.4  0.2]\n",
      " [ 4.7  3.2  1.3  0.2]\n",
      " [ 4.6  3.1  1.5  0.2]\n",
      " [ 5.   3.6  1.4  0.2]\n",
      " [ 5.4  3.9  1.7  0.4]\n",
      " [ 4.6  3.4  1.4  0.3]\n",
      " [ 5.   3.4  1.5  0.2]\n",
      " [ 4.4  2.9  1.4  0.2]\n",
      " [ 4.9  3.1  1.5  0.1]\n",
      " [ 5.4  3.7  1.5  0.2]\n",
      " [ 4.8  3.4  1.6  0.2]\n",
      " [ 4.8  3.   1.4  0.1]\n",
      " [ 4.3  3.   1.1  0.1]\n",
      " [ 5.8  4.   1.2  0.2]\n",
      " [ 5.7  4.4  1.5  0.4]\n",
      " [ 5.4  3.9  1.3  0.4]\n",
      " [ 5.1  3.5  1.4  0.3]\n",
      " [ 5.7  3.8  1.7  0.3]\n",
      " [ 5.1  3.8  1.5  0.3]\n",
      " [ 5.4  3.4  1.7  0.2]\n",
      " [ 5.1  3.7  1.5  0.4]\n",
      " [ 4.6  3.6  1.   0.2]\n",
      " [ 5.1  3.3  1.7  0.5]\n",
      " [ 4.8  3.4  1.9  0.2]\n",
      " [ 5.   3.   1.6  0.2]\n",
      " [ 5.   3.4  1.6  0.4]\n",
      " [ 5.2  3.5  1.5  0.2]\n",
      " [ 5.2  3.4  1.4  0.2]\n",
      " [ 4.7  3.2  1.6  0.2]\n",
      " [ 4.8  3.1  1.6  0.2]\n",
      " [ 5.4  3.4  1.5  0.4]\n",
      " [ 5.2  4.1  1.5  0.1]\n",
      " [ 5.5  4.2  1.4  0.2]\n",
      " [ 4.9  3.1  1.5  0.1]\n",
      " [ 5.   3.2  1.2  0.2]\n",
      " [ 5.5  3.5  1.3  0.2]\n",
      " [ 4.9  3.1  1.5  0.1]\n",
      " [ 4.4  3.   1.3  0.2]\n",
      " [ 5.1  3.4  1.5  0.2]\n",
      " [ 5.   3.5  1.3  0.3]\n",
      " [ 4.5  2.3  1.3  0.3]\n",
      " [ 4.4  3.2  1.3  0.2]\n",
      " [ 5.   3.5  1.6  0.6]\n",
      " [ 5.1  3.8  1.9  0.4]\n",
      " [ 4.8  3.   1.4  0.3]\n",
      " [ 5.1  3.8  1.6  0.2]\n",
      " [ 4.6  3.2  1.4  0.2]\n",
      " [ 5.3  3.7  1.5  0.2]\n",
      " [ 5.   3.3  1.4  0.2]\n",
      " [ 7.   3.2  4.7  1.4]\n",
      " [ 6.4  3.2  4.5  1.5]\n",
      " [ 6.9  3.1  4.9  1.5]\n",
      " [ 5.5  2.3  4.   1.3]\n",
      " [ 6.5  2.8  4.6  1.5]\n",
      " [ 5.7  2.8  4.5  1.3]\n",
      " [ 6.3  3.3  4.7  1.6]\n",
      " [ 4.9  2.4  3.3  1. ]\n",
      " [ 6.6  2.9  4.6  1.3]\n",
      " [ 5.2  2.7  3.9  1.4]\n",
      " [ 5.   2.   3.5  1. ]\n",
      " [ 5.9  3.   4.2  1.5]\n",
      " [ 6.   2.2  4.   1. ]\n",
      " [ 6.1  2.9  4.7  1.4]\n",
      " [ 5.6  2.9  3.6  1.3]\n",
      " [ 6.7  3.1  4.4  1.4]\n",
      " [ 5.6  3.   4.5  1.5]\n",
      " [ 5.8  2.7  4.1  1. ]\n",
      " [ 6.2  2.2  4.5  1.5]\n",
      " [ 5.6  2.5  3.9  1.1]\n",
      " [ 5.9  3.2  4.8  1.8]\n",
      " [ 6.1  2.8  4.   1.3]\n",
      " [ 6.3  2.5  4.9  1.5]\n",
      " [ 6.1  2.8  4.7  1.2]\n",
      " [ 6.4  2.9  4.3  1.3]\n",
      " [ 6.6  3.   4.4  1.4]\n",
      " [ 6.8  2.8  4.8  1.4]\n",
      " [ 6.7  3.   5.   1.7]\n",
      " [ 6.   2.9  4.5  1.5]\n",
      " [ 5.7  2.6  3.5  1. ]\n",
      " [ 5.5  2.4  3.8  1.1]\n",
      " [ 5.5  2.4  3.7  1. ]\n",
      " [ 5.8  2.7  3.9  1.2]\n",
      " [ 6.   2.7  5.1  1.6]\n",
      " [ 5.4  3.   4.5  1.5]\n",
      " [ 6.   3.4  4.5  1.6]\n",
      " [ 6.7  3.1  4.7  1.5]\n",
      " [ 6.3  2.3  4.4  1.3]\n",
      " [ 5.6  3.   4.1  1.3]\n",
      " [ 5.5  2.5  4.   1.3]\n",
      " [ 5.5  2.6  4.4  1.2]\n",
      " [ 6.1  3.   4.6  1.4]\n",
      " [ 5.8  2.6  4.   1.2]\n",
      " [ 5.   2.3  3.3  1. ]\n",
      " [ 5.6  2.7  4.2  1.3]\n",
      " [ 5.7  3.   4.2  1.2]\n",
      " [ 5.7  2.9  4.2  1.3]\n",
      " [ 6.2  2.9  4.3  1.3]\n",
      " [ 5.1  2.5  3.   1.1]\n",
      " [ 5.7  2.8  4.1  1.3]\n",
      " [ 6.3  3.3  6.   2.5]\n",
      " [ 5.8  2.7  5.1  1.9]\n",
      " [ 7.1  3.   5.9  2.1]\n",
      " [ 6.3  2.9  5.6  1.8]\n",
      " [ 6.5  3.   5.8  2.2]\n",
      " [ 7.6  3.   6.6  2.1]\n",
      " [ 4.9  2.5  4.5  1.7]\n",
      " [ 7.3  2.9  6.3  1.8]\n",
      " [ 6.7  2.5  5.8  1.8]\n",
      " [ 7.2  3.6  6.1  2.5]\n",
      " [ 6.5  3.2  5.1  2. ]\n",
      " [ 6.4  2.7  5.3  1.9]\n",
      " [ 6.8  3.   5.5  2.1]\n",
      " [ 5.7  2.5  5.   2. ]\n",
      " [ 5.8  2.8  5.1  2.4]\n",
      " [ 6.4  3.2  5.3  2.3]\n",
      " [ 6.5  3.   5.5  1.8]\n",
      " [ 7.7  3.8  6.7  2.2]\n",
      " [ 7.7  2.6  6.9  2.3]\n",
      " [ 6.   2.2  5.   1.5]\n",
      " [ 6.9  3.2  5.7  2.3]\n",
      " [ 5.6  2.8  4.9  2. ]\n",
      " [ 7.7  2.8  6.7  2. ]\n",
      " [ 6.3  2.7  4.9  1.8]\n",
      " [ 6.7  3.3  5.7  2.1]\n",
      " [ 7.2  3.2  6.   1.8]\n",
      " [ 6.2  2.8  4.8  1.8]\n",
      " [ 6.1  3.   4.9  1.8]\n",
      " [ 6.4  2.8  5.6  2.1]\n",
      " [ 7.2  3.   5.8  1.6]\n",
      " [ 7.4  2.8  6.1  1.9]\n",
      " [ 7.9  3.8  6.4  2. ]\n",
      " [ 6.4  2.8  5.6  2.2]\n",
      " [ 6.3  2.8  5.1  1.5]\n",
      " [ 6.1  2.6  5.6  1.4]\n",
      " [ 7.7  3.   6.1  2.3]\n",
      " [ 6.3  3.4  5.6  2.4]\n",
      " [ 6.4  3.1  5.5  1.8]\n",
      " [ 6.   3.   4.8  1.8]\n",
      " [ 6.9  3.1  5.4  2.1]\n",
      " [ 6.7  3.1  5.6  2.4]\n",
      " [ 6.9  3.1  5.1  2.3]\n",
      " [ 5.8  2.7  5.1  1.9]\n",
      " [ 6.8  3.2  5.9  2.3]\n",
      " [ 6.7  3.3  5.7  2.5]\n",
      " [ 6.7  3.   5.2  2.3]\n",
      " [ 6.3  2.5  5.   1.9]\n",
      " [ 6.5  3.   5.2  2. ]\n",
      " [ 6.2  3.4  5.4  2.3]\n",
      " [ 5.9  3.   5.1  1.8]]\n",
      "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
      " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
      " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
      " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
      " 2 2]\n",
      "600\n"
     ]
    }
   ],
   "source": [
    "print(X)\n",
    "print(y)\n",
    "print(X.size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "knn = KNNClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "knn.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "result = knn.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 0, 2, 2, 0, 1, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 1, 2, 1, 2, 0, 2, 2, 1, 2, 2, 2, 0, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 0]\n"
     ]
    }
   ],
   "source": [
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0 2 1 0 1 0 0 0 1 2 2 1 0 0 0 2 2 0 0 0 1 1 2 2 2 0 1 2 1 2 2 2 0 1 2 0\n",
      " 2 1 1 1 1 1 1 0]\n"
     ]
    }
   ],
   "source": [
    "print(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "score = metrics.accuracy_score(y_pred=result, y_true=y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.911111\n"
     ]
    }
   ],
   "source": [
    "print('{0:f}'.format(score))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.neighbors import "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "knn1 = KNeighborsClassifier(n_neighbors=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "           metric_params=None, n_jobs=1, n_neighbors=1, p=2,\n",
       "           weights='uniform')"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn1.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "result1 = knn1.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "score1 = metrics.accuracy_score(result1, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.911111\n"
     ]
    }
   ],
   "source": [
    "print('{0:f}'.format(score1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
